## Replication code for "City Limits to Partisan Polarization" by Amalie Jensen,
## William Marble, Kenneth Scheve, and Matthew Slaughter


# This script contains functions used for summarizing and manipulating the
# hierarchical model results.




# Extract regression results ----------------------------------------------


# this function puts the higher-level regression coefficients into a nice
# table, that optionally labels what the covariate is and what the 
# conjoint level is. also reports central 100*(1-alpha)% CIs.
extract_gamma = function(params, coefname, cov_names = NULL, level_names = NULL, alpha = .05){
  
  require(assertthat)
  assert_that(coefname %in% names(params), msg = "coefname is not one of the parameters!")
  tmp = params[[coefname]]
  
  # get dimensions and put params into an array if coefname == intercept
  niter = dim(tmp)[1]
  if (!grepl("intercept", coefname)){
    nlevs = dim(tmp)[2]
    ncovs = dim(tmp)[3]
    params = tmp
    rm(tmp)
  } else {
    nlevs = 1
    ncovs = dim(tmp)[2]
    params = array(NA, dim = c(niter, nlevs, ncovs))
    params[,1,] = tmp
  }
  
  
  # create df to store results
  lcl_name = paste0("p", (alpha/2))
  ucl_name = paste0("p", 1 - (alpha/2))
  coefout = data.frame(coef = rep(NA, nlevs * ncovs),
                       level = NA,
                       cov_num = NA,
                       post_mean = NA,
                       post_sd = NA)
  coefout[[lcl_name]] = NA
  coefout[[ucl_name]] = NA
  
  k = 1
  for (i in 1:nlevs){
    for (j in 1:ncovs){
      this_coef_name = paste0(coefname, "_", i, "_", j)
      coefout$coef[k] = this_coef_name
      
      # get level and covariate number for easy labeling later
      coefout$level[k] = paste0(gsub("gamma_", "", coefname), "_", i)
      coefout$cov_num[k] = j
      
      # extract samples from posterior and summarize
      samples = params[,i,j]
      coefout$post_mean[k] = mean(samples)
      coefout$post_sd[k] = sd(samples)
      coefout[[lcl_name]][k] = quantile(samples, alpha / 2)
      coefout[[ucl_name]][k] = quantile(samples, 1 - (alpha / 2))
      k = k + 1 
    }
  }
  
  # Add names of individual-level covariates
  if (!is.null(cov_names)){
    if (length(cov_names) != length(unique(coefout$cov_num))){
      warning("Supplied covariate labels are not the same length as what's found in the params. Not labeling output.")
    } else {
      cov_names = data.frame(cov_num = 1:length(cov_names), cov_names = cov_names)
      coefout = merge(cov_names, coefout, by = "cov_num")
      coefout$cov_num = NULL
    }
  }
  
  # Add levels in conjoint 
  if (!is.null(level_names)){
    if (length(level_names) != length(unique(coefout$level))){
      warning("Supplied level names are not the same length as what's found in the params. Not labeling output.")
    } else {
      coefout$tmp = as.integer(substr(coefout$level, nchar(coefout$level), nchar(coefout$level)))
      coefout$level = NULL
      level_names = data.frame(tmp = 1:length(level_names), level_names = level_names)
      coefout = merge(level_names, coefout, by = "tmp")
      coefout$tmp = NULL
    }
  }
  return(coefout)
}





# Make Regression Tables --------------------------------------------------


## This function takes the results of the extract_gamma function and creates
## a LaTeX regression table of the results. 


make_table = function(x,                   # output from extract_gamma
                      cov_order,           # order covariates should appear in table
                      level_order,         # order factor levels should appear as columns in table
                      level_labels = NULL, # level labels
                      scale = 1,           # multiply coefficients by this number before printing
                      digits = 2,          # number of decimal points
                      omit = NULL,         # any covariates to omit from the output, regex or char string
                      ci = FALSE,          # if true, print CIs rather than posterior SD's
                      alpha = .05,         # what is the significance level
                      aster = FALSE,       # if true, star for given alpha level when CI = FALSE
                      out.file = NULL,     # where to print the latex table; if null, print to console
                      align = TRUE,        # use dcolumn to align decimal pts
                      checks = NULL,       # rows of checkmarks to include
                      ...                  # want to be able to add lines as notes
                      ){
  require(assertthat)
  assert_that(all(cov_order %in% x$cov_names))
  assert_that(all(level_order %in% x$level_names))
  
  lcl_name = paste0("p", (alpha/2))
  ucl_name = paste0("p", 1 - (alpha/2))
  if (ci){
    assert_that(lcl_name %in% names(x), ucl_name %in% names(x), 
                msg = "alpha level in make_table must match alpha level in extract_gamma when ci=TRUE.")
  }
  
  # get rid of omitted params
  if (!is.null(omit)){
    # assume regex if length(omit) == 1, otherwise a list of covariates
    regex = length(omit) == 1
    if (!regex){
      x = subset(x, !cov_names %in% omit)
      cov_order = cov_order[!cov_order %in% omit]
    } else {
      x = subset(x, !grepl(omit, cov_names))
      cov_order = cov_order[!grepl(omit, cov_order)]
    }
  }
  
  # reorder the x data frame and scale the coefficients
  x = x %>% 
    mutate(levorder = as.numeric(factor(level_names, level_order)),
           covorder = as.numeric(factor(cov_names, cov_order))) %>% 
    arrange(levorder, covorder) %>% 
    mutate(post_mean = post_mean * scale,
           post_sd   = post_sd * scale, 
           t_stat    = post_mean / post_sd, 
           pval      = (1 - pnorm(abs(t_stat))) * 2,
           signif    = ifelse(pval < alpha, 1, 0))
  x[[lcl_name]] = x[[lcl_name]] * scale
  x[[ucl_name]] = x[[ucl_name]] * scale
  
  # get number of columns
  num_cols = length(level_order)
  
  # If you supply your own level labels, they'll probably have a 
  # line break in them. Make sure to include the same number of 
  # line breaks in each label. 
  if(!is.null(level_labels)){
    assert_that(length(level_labels) == length(level_order))
    level_labels2 = strsplit(level_labels, "\\\\n")
    level_labels1 = sapply(level_labels2, function(x) x[[1]])
    level_labels2 = sapply(level_labels2, function(x) x[[2]])
    level_labels2 = rbind(level_labels1, level_labels2)
    n_header_rows = nrow(level_labels2)
  } else{
    level_labels2 = matrix(level_order, nrow = 1)
    n_header_rows = 1
  }
  
  # start creating tabular{} table
  if (align){
    first = paste0("\\begin{tabular}{r", paste0(rep(paste0("D{.}{.}{", digits, "}"), num_cols), collapse=""), "}\\\\ \\toprule")
  } else {
    first = paste0("\\begin{tabular}{r", paste0(rep("c", num_cols), collapse=""), "}\\\\ \\toprule")
  }
  
  
  header_rows = matrix("", nrow=n_header_rows, ncol=1)
  for (i in 1:n_header_rows){
    tmp_lev = level_labels2[i,]
    tmp_lev = paste("\\multicolumn{1}{r}{", tmp_lev, "}", sep = "")
    tmp_lev = paste0(paste0(c("", tmp_lev), collapse = "   &   "), "  \\\\")
    header_rows[i,] = tmp_lev
  }
  header_rows[n_header_rows,] = paste0(header_rows[n_header_rows,], "\\midrule")
  header_rows = as.vector(header_rows)
  middle = matrix("", nrow = length(cov_order)*2, ncol = num_cols+1)
  middle[seq(1, nrow(middle), 2),1] = cov_order
  
  printfmt = paste0("%.", digits, "f")
  for (j in 1:num_cols){
    this_level = level_order[j]
    tmp = x %>% filter(level_names == this_level)
    
    # fill in posterior mean of coefficients
    if (align){
      this_mean = sprintf(printfmt, tmp$post_mean)
      
      # add asterisk if requested
      if (aster){
        this_signif = tmp$signif
        this_mean   = paste0(this_mean, ifelse(this_signif, "^{*}", "")) 
      }
      middle[seq(1, nrow(middle), 2), j+1] = this_mean
    } else {
      this_mean = paste("$", sprintf(printfmt, tmp$post_mean), "$", sep = "")
      middle[seq(1, nrow(middle), 2), j+1] = this_mean
    }
    
    
    # fill in posterior CI's or SD's
    if (ci){
      this_lcl  = paste("[", sprintf(printfmt, tmp[[lcl_name]]), ", ", sep = "")
      this_ucl  = paste(sprintf(printfmt, tmp[[ucl_name]]), "]", sep = "")
      this_ci   = paste(this_lcl, this_ucl, sep = "")
      this_ci   = paste("\\multicolumn{1}{c}{$", this_ci, "$}", sep = "")
      middle[seq(2, nrow(middle), 2), j+1] = this_ci
    } else {
      this_sd   = paste("(", sprintf(printfmt, tmp$post_sd), ")", sep = "")
      middle[seq(2, nrow(middle), 2), j+1] = this_sd
    }
    
    
  }
  if (!is.null(checks)) { 
    assert_that(class(checks) == "character", msg = "checks must be a character vector.")
    
    addbottom = matrix(NA_character_, nrow = length(checks), ncol = num_cols + 1)
    addbottom[, -1] = "\\checkmark"
    for (i in 1:length(checks)) {
      addbottom[i,1] = paste0(checks[1], " Controls")
    }
    middle = rbind(middle, addbottom)
  }
  
  middle = apply(middle, 1, paste0, collapse = "    &    ")
  middle = paste(middle, "  \\\\")
  middle[length(middle)] = paste0(middle[length(middle)] , "\\bottomrule")
  bottom = "\\end{tabular}"
  
  out = c(first, header_rows, middle, bottom)
  cat(out, sep = "\n", file =  ifelse(is.null(out.file), stdout(), out.file))
}



# Add linebreaks ----------------------------------------------------------

# helper function used in generating labels
add_linebreaks = function(x) { 
  wordsplit = strsplit(x, " ")
  nwords = sapply(wordsplit, length)
  breaks = floor(nwords/2)
  for (i in 1:length(wordsplit)){
    wordsplit[[i]][breaks[i]] = paste0(wordsplit[[i]][breaks[i]], "\\n")
    wordsplit[[i]] = paste0(wordsplit[[i]], collapse = " ")
  }
  out = unlist(wordsplit)
  return(out)
}






# Summarize party results -------------------------------------------------


# Summarize conjoint heterogeneity results for party. 
# Idea is to hold all covariates at their observed levels, except for party,
# and predict individual-level IMCEs. Then summarize the distribution of 
# predicted IMCE's using kernel density estimate (apply density estimator
# to a random sample of draws from the posterior, then take the mean of the 
# pointwise density estimates). Also get overall posterior mean, plus
# the probability that the means of the two simulated distributions
# have opposite signs.

# params: results of extract(stan_output)
# factorname: which factor to summarize (educ, hieduc, invest, etc)
# dem: individual covariates used for estimation, except set everyone to strong dem
# rep: "" "" strong rep
# levs: names of the levels for this factor
# nsims: how many draws from the posterior to apply density estimator to
summarisePartyResults = function(params, factorname, dem, rep, levs, nsims = 500){
  
  require(dplyr)
  
  factorname2 = paste0("gamma_", factorname)
  stopifnot(factorname2 %in% names(params))
  stopifnot(identical(dim(rep), dim(dem)))
  
  niter = dim(params[[factorname2]])[1]
  nresp = dim(dem)[1]
  
  if (nsims > niter){
    warning("You specified nsims greater than the number of samples from the posterior. Using all available samples.")
    nsims = niter
  }
  
  # loop thru iterations and get predicted IMCEs for each 
  gammas = params[[factorname2]]
  nlevs = dim(gammas)[2]
  mce_rep <- mce_dem <- array(NA, dim = c(niter, nresp, nlevs))
  for (i in 1:niter) { 
    mce_dem[i,,] = dem %*% t(gammas[i,,])
    mce_rep[i,,] = rep %*% t(gammas[i,,])
  }
  
  # get posterior means
  dem_mean_by_iter = apply(mce_dem, c(1, 3), mean)
  rep_mean_by_iter = apply(mce_rep, c(1, 3), mean)
  dem_means = apply(dem_mean_by_iter, 2, mean)
  rep_means = apply(rep_mean_by_iter, 2, mean)
  dem_sem = apply(dem_mean_by_iter, 2, function(x) sd(x) / sqrt(length(x)))
  rep_sem = apply(rep_mean_by_iter, 2, function(x) sd(x) / sqrt(length(x)))
  
  
  # get posterior probability that the means of the AMCEs under the D/R 
  # simulations have different signs.
  opp_signs = matrix(NA, nrow = niter, ncol = nlevs)
  for (k in 1:nlevs){
    opp_signs[, k] = sign(dem_mean_by_iter[,k]) != sign(rep_mean_by_iter[,k])
  }
  pr_opp_signs = apply(opp_signs, 2, mean)
  
  # put means and p-val into a table
  means_out = data.frame(
    factor = factorname,
    level = 1:nlevs,
    dem_mean = dem_means,
    dem_sem = dem_sem,
    rep_mean = rep_means,
    rep_sem = rep_sem,
    party_p_val = pr_opp_signs,
    stringsAsFactors = FALSE
  )
  
  
  # now estimate density (cross-sectionally) for each of
  # of random sample iterations 1,..., nsims 
  samp_iter = base::sample(1:niter, nsims)
  dens_out = list()
  for (level in 1:nlevs){
    
    # number of points at which to evaluate density
    n_x0 = 400
    
    # create grid to estimate density at these points
    top = ceiling(max(max(mce_dem[samp_iter,,level]), min(mce_dem[samp_iter,,level]), max(mce_rep[samp_iter,,level]), min(mce_rep[samp_iter,,level])) * 10) / 10
    bot = floor(min(max(mce_dem[samp_iter,,level]), min(mce_dem[samp_iter,,level]), max(mce_rep[samp_iter,,level]), min(mce_rep[samp_iter,,level])) * 10) / 10
    grid = seq(bot, top, length.out = n_x0)
    
    dem_dens = apply(mce_dem[samp_iter,,level], 1, density, from = bot, to = top, n = n_x0, bw = "nrd", kernel = "gaus")
    rep_dens = apply(mce_rep[samp_iter,,level], 1, density, from = bot, to = top, n = n_x0, bw = "nrd", kernel = "gaus")
    
    # get expected density and 10th/90th quantile
    dem_out = sapply(dem_dens, function(x) x$y)
    dem_mean = apply(dem_out, 1, mean)
    dem_10 = apply(dem_out, 1, quantile, probs = .1)
    dem_90 = apply(dem_out, 1, quantile, probs = .9)
    dem_out = data.frame(party = "Strong Democrat", factor = factorname, 
                         level = level, x = grid, dens_mean = dem_mean,
                         dens_10 = dem_10, dens_90 = dem_90)
    
    rep_out = sapply(rep_dens, function(x) x$y)
    rep_mean = apply(rep_out, 1, mean)
    rep_10 = apply(rep_out, 1, quantile, probs = .1)
    rep_90 = apply(rep_out, 1, quantile, probs = .9)
    rep_out = data.frame(party = "Strong Republican", factor = factorname, 
                         level = level, x = grid, dens_mean = rep_mean,
                         dens_10 = rep_10, dens_90 = rep_90)
    
    dens_out[[level]] = rbind(dem_out, rep_out)
  }
  
  dens_out = bind_rows(dens_out)
  
  # return both means + densities
  ret = list(density = dens_out,
             mean = means_out)
  return(ret)
}



